# _*_ coding:utf-8 _*_
import os
import requests
import time
from bs4 import BeautifulSoup
import re
import io
import sys
import datetime
from lxml import etree
import random
import json


url = "http://www.hotelaah.com/jichang.html"

def get_content(url):
    headers = {
        'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/48.0.2564.116 Safari/537.36'
    }
    r = requests.get(url, headers=headers)
    r.raise_for_status()
    # 获取正确的编码格式
    r.encoding = "gb2312"
    html = r.text
    return html


def write_to_file(ls):
    fo = open("data/jichang.json", "w")
    # str = "\n".join(ls)
    jsonStr = json.dumps(ls, ensure_ascii=False) 
    fo.write(jsonStr)
    # 关闭文件
    fo.close()

def get_plist():
    html = get_content(url)
    soup = BeautifulSoup(html, "html.parser")
    table = soup.find_all('ul')[0]
    a_list = table.find_all('a')
    res_list = []
    for a in a_list:
        href = a.get("href")
        province = a.text.strip()
        c_url = 'http://www.hotelaah.com/' + href
        c_html = get_content(c_url)
        time.sleep(random.randrange(1, 4))
        c_soup = BeautifulSoup(c_html, "html.parser")
        tab = c_soup.find_all('table', attrs={"border": "2"})[0]
        tr_list = tab.find_all('tr')
        
        print(tr_list)

        for inx, tr in enumerate(tr_list):
            if inx != 0:
                td_list = tr.find_all('td')
                res_item = {
                    "city": td_list[0].text,
                    "name": td_list[1].text,
                    "pinyin": td_list[2].text,
                    "code": td_list[3].text,
                    "distance": td_list[4].text,
                    "province": province
                }
                res_list.append(res_item)
    res = {}
    res["data"] = res_list
    print(res)
    write_to_file(res)

                
        


if __name__ == "__main__":
    get_plist()